#library
library(readr)
library(tidyselect)
library(tidyverse)
## Warning in as.POSIXlt.POSIXct(Sys.time()): unable to identify current timezone 'C':
## please set environment variable 'TZ'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ dplyr 1.0.10
## ✔ tibble 3.1.8 ✔ stringr 1.4.1
## ✔ tidyr 1.2.1 ✔ forcats 0.5.2
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(broom)
library(vroom)
## Warning: package 'vroom' was built under R version 4.2.2
library(gganimate)
## Warning: package 'gganimate' was built under R version 4.2.2
library(maps)
## Warning: package 'maps' was built under R version 4.2.2
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.2.2
library(tinytex)
## Warning: package 'tinytex' was built under R version 4.2.2
#Link to Download the dataset https://platform.who.int/mortality/countries/country-details/MDB/luxembourg #Link to Download the dataset world https://platform.who.int/mortality/themes/theme-details/MDB/noncommunicable-diseases #https://ourworldindata.org/age-structure #Read the dataset and Tidy.
#Mortality in Luxembourg
Mortality<-read_csv("data/Mortality_Database_Luxembourg.csv",
skip = 9,
col_names = c( "Indicator_Code",
"Indicator_Name",
"Year",
"Sex",
"Age_group_code",
"Age_Group",
"Number",
"Percentage_of_cause_specific_deaths_out_of_total_deaths",
"Age_standardized_death_rate_per_100000_standard_population",
"Death_rate_per_100000_population",
"Dumm"),
show_col_types = TRUE)
## Rows: 397429 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Indicator_Code, Indicator_Name, Sex, Age_group_code, Age_Group
## dbl (5): Year, Number, Percentage_of_cause_specific_deaths_out_of_total_deat...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Mortality
## # A tibble: 397,429 × 10
## Indicato…¹ Indic…² Year Sex Age_g…³ Age_G…⁴ Number Perce…⁵ Age_s…⁶ Death…⁷
## <chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CG0000 All Ca… 1967 All Age_all [All] 4154 100 984. 1239.
## 2 CG0000 All Ca… 1967 All Age00 [0] 124 100 NA 2583.
## 3 CG0000 All Ca… 1967 All Age01_… [1-4] 20 100 NA 97.6
## 4 CG0000 All Ca… 1967 All Age05_… [5-9] 18 100 NA 70.3
## 5 CG0000 All Ca… 1967 All Age10_… [10-14] 11 100 NA 45.1
## 6 CG0000 All Ca… 1967 All Age15_… [15-19] 18 100 NA 79.3
## 7 CG0000 All Ca… 1967 All Age20_… [20-24] 22 100 NA 101.
## 8 CG0000 All Ca… 1967 All Age25_… [25-29] 25 100 NA 110.
## 9 CG0000 All Ca… 1967 All Age30_… [30-34] 37 100 NA 161.
## 10 CG0000 All Ca… 1967 All Age35_… [35-39] 65 100 NA 255.
## # … with 397,419 more rows, and abbreviated variable names ¹Indicator_Code,
## # ²Indicator_Name, ³Age_group_code, ⁴Age_Group,
## # ⁵Percentage_of_cause_specific_deaths_out_of_total_deaths,
## # ⁶Age_standardized_death_rate_per_100000_standard_population,
## # ⁷Death_rate_per_100000_population
#Mortality causes by noncauminicate desease in the in luxembourg from 1967
Noncauminicate_in_luxembourg<-read_csv("data/Luxembourg/Luxembourg.csv",
skip = 7,
col_names = c( "Region_Code",
"Region_Name",
"Country_Code",
"region",
"Year",
"Sex",
"Age_group_code",
"Age_Group",
"Number",
"Percentage_of_cause_specific_deaths_out_of_total_deaths",
"Age_standardized_death_rate_per_100_000_standard_population",
"Death_rate_per_100_000_population",
""),
show_col_types = TRUE)
## New names:
## Rows: 53 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Region_Code, Region_Name, Country_Code, region, Sex, Age_group_code... dbl
## (5): Year, Number, Percentage_of_cause_specific_deaths_out_of_total_deat...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...13`
Noncauminicate_in_luxembourg
## # A tibble: 53 × 12
## Region_Code Regio…¹ Count…² region Year Sex Age_g…³ Age_G…⁴ Number Perce…⁵
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 EU Europe LUX Luxem… 1967 All Age_all [All] 3407 82.0
## 2 EU Europe LUX Luxem… 1968 All Age_all [All] 3475 84.5
## 3 EU Europe LUX Luxem… 1969 All Age_all [All] 3561 85.2
## 4 EU Europe LUX Luxem… 1970 All Age_all [All] 3507 84.4
## 5 EU Europe LUX Luxem… 1971 All Age_all [All] 3730 84.7
## 6 EU Europe LUX Luxem… 1972 All Age_all [All] 3512 85.2
## 7 EU Europe LUX Luxem… 1973 All Age_all [All] 3542 84.8
## 8 EU Europe LUX Luxem… 1974 All Age_all [All] 3607 84.1
## 9 EU Europe LUX Luxem… 1975 All Age_all [All] 3737 85.4
## 10 EU Europe LUX Luxem… 1976 All Age_all [All] 3775 84.0
## # … with 43 more rows, 2 more variables:
## # Age_standardized_death_rate_per_100_000_standard_population <dbl>,
## # Death_rate_per_100_000_population <dbl>, and abbreviated variable names
## # ¹Region_Name, ²Country_Code, ³Age_group_code, ⁴Age_Group,
## # ⁵Percentage_of_cause_specific_deaths_out_of_total_deaths
#Mortality causes by noncauminicate desease in the world
Noncauminicate<-read_csv("data/WHOMortalityDatabase_Map_Noncommunicable_diseases_14th_December_2022_17_17.csv",
skip = 7,
col_names = c( "Region_Code",
"Region_Name",
"Country_Code",
"region",
"Year",
"Sex",
"Age_group_code",
"Age_Group",
"Number",
"Percentage_of_cause_specific_deaths_out_of_total_deaths",
"Age_standardized_death_rate_per_100_000_standard_population",
"Death_rate_per_100_000_population",
""),
show_col_types = TRUE)
## New names:
## Rows: 291 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Region_Code, Region_Name, Country_Code, region, Sex, Age_group_code... dbl
## (4): Year, Number, Percentage_of_cause_specific_deaths_out_of_total_deat... lgl
## (2): Age_standardized_death_rate_per_100_000_standard_population, ...13
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...13`
Noncauminicate
## # A tibble: 291 × 13
## Region_Code Regio…¹ Count…² region Year Sex Age_g…³ Age_G…⁴ Number Perce…⁵
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 EU Europe ALB Alban… 2000 All Age75_… [75-79] 1835 87.5
## 2 EU Europe ALB Alban… 2000 All Age80_… [80-84] 1592 79.0
## 3 EU Europe ALB Alban… 2000 All Age85_… [85+] 1834 67.4
## 4 NAC North … ATG Antig… 2000 All Age75_… [75-79] 51 89.5
## 5 NAC North … ATG Antig… 2000 All Age80_… [80-84] 39 86.7
## 6 NAC North … ATG Antig… 2000 All Age85_… [85+] 64 84.2
## 7 CSA Centra… ARG Argen… 2000 All Age75_… [75-79] 30924 82.4
## 8 CSA Centra… ARG Argen… 2000 All Age80_… [80-84] 28769 80.8
## 9 CSA Centra… ARG Argen… 2000 All Age85_… [85+] 41270 78.1
## 10 AS Asia ARM Armen… 2000 All Age75_… [75-79] 2854 94.6
## # … with 281 more rows, 3 more variables:
## # Age_standardized_death_rate_per_100_000_standard_population <lgl>,
## # Death_rate_per_100_000_population <dbl>, ...13 <lgl>, and abbreviated
## # variable names ¹Region_Name, ²Country_Code, ³Age_group_code, ⁴Age_Group,
## # ⁵Percentage_of_cause_specific_deaths_out_of_total_deaths
#Population by group age Europe
Population<-read_csv("data/age_dependency_ratio_of_working_age_population.csv",
skip = 7,
col_names = c( "region",
"Code",
"Year",
"estimates_age",
""),
show_col_types = TRUE)
## New names:
## Rows: 18138 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): region, Code dbl (2): Year, estimates_age
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...5`
Population
## # A tibble: 18,138 × 4
## region Code Year estimates_age
## <chr> <chr> <dbl> <dbl>
## 1 Afghanistan AFG 1956 78.9
## 2 Afghanistan AFG 1957 79.2
## 3 Afghanistan AFG 1958 79.6
## 4 Afghanistan AFG 1959 79.8
## 5 Afghanistan AFG 1960 80.0
## 6 Afghanistan AFG 1961 80.2
## 7 Afghanistan AFG 1962 80.4
## 8 Afghanistan AFG 1963 80.7
## 9 Afghanistan AFG 1964 81.2
## 10 Afghanistan AFG 1965 82
## # … with 18,128 more rows
#We want to see how the mortality rate varies over the course of 1960 to 2021.
Mortality %>% filter(Age_group_code == "Age_all", Indicator_Name=="All Causes" ) %>%
ggplot(aes(x=Year, y=Number, group=Sex))+
geom_line(aes(color=Sex))+
geom_point(aes(color=Sex))+
xlab("Year") + ylab("Number") +
ggtitle("Mortality rate over years in Luxembourg")+
transition_reveal(Year)
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
#We shall now analyze a few major causes of mortality in luxembourg
and how these causes have affected different parts of the sex
Mortality |> count(Indicator_Name, sort = TRUE)
## # A tibble: 193 × 2
## Indicator_Name n
## <chr> <int>
## 1 All Causes 2703
## 2 Appendicitis 2703
## 3 Birth asphyxia and birth trauma 2703
## 4 Breast cancer 2703
## 5 Cardiovascular diseases 2703
## 6 Cataracts 2703
## 7 Cerebrovascular disease 2703
## 8 Childhood-cluster diseases 2703
## 9 Chronic obstructive pulmonary disease 2703
## 10 Cirrhosis of the liver 2703
## # … with 183 more rows
ggplot(data = Mortality |> filter(Number>1000)) +
geom_col(aes(y = Number, x = Year)) +
facet_wrap(~Indicator_Name)
#Number of deaf by noncommunicatif desease by cat age
#summurase
#(?<=\\[).*(?=\\])
#Mortality
Mortality %>% filter(Indicator_Name =="Noncommunicable diseases") %>%
mutate(Age_by_group_New = str_extract(Age_Group,"(\\d)+") %>% as.numeric(),
Age_by_group_New = if_else(Age_group_code == "Age_all", 74, Age_by_group_New),
Age_Group = fct_reorder(Age_Group, Age_group_code)) %>%
ggplot(aes(x = Age_Group, y=Number, color=Sex))+
geom_point(size=2, alpha=0.5)+
#geom_line(size=1)+
theme_minimal()+
labs(title="Yearly Culmulative mortality by NonComunication deseses in Luxembourg")+
theme(axis.text.x = element_text(angle = 90))#+
#transition_reveal(Year)
#Find the Year , where we have most mortality from Age 54 to 74 in luxembourg
world_map <- map_data("world")
Luxembourg_map<-world_map %>% filter(region == "Luxembourg")
Noncauminicate_in_luxembourg<-Noncauminicate_in_luxembourg %>% select("region",
"Year",
"Death_rate_per_100_000_population")
Luxembourg_map_join<-left_join(Luxembourg_map,Noncauminicate_in_luxembourg, by="region")
#Luxembourg_map_join
for (y in unique(Noncauminicate_in_luxembourg$Year)) {
print(Noncauminicate_in_luxembourg %>%
filter(Year == y) %>%
select(Year,region,Death_rate_per_100_000_population) %>%
ggplot() +
geom_map(
dat = Luxembourg_map, map = Luxembourg_map, aes(map_id = region),
fill = "white", color = "#7f7f7f"
) +
geom_map(map = Luxembourg_map, aes(map_id = region, fill = Death_rate_per_100_000_population)) +
scale_fill_gradient(low = "white", high = "#cc4c02", name = "Fraction", limits = c(0, max(Noncauminicate_in_luxembourg$Death_rate_per_100_000_population))) +
expand_limits(x = Luxembourg_map$long, y = Luxembourg_map$lat) +
labs(title = paste("Fraction of 50+ Population in ", y)))+
transition_manual(frames = Year)
}
#Comparing the mortality cause by noncomminicate desase in Luxembourg to other country from 54 to 74
mapdata <- map_data("world")
newDataForNonCauminicate<-Noncauminicate %>% select(region, Number)
#newDataForNonCauminicate
mapdata_join<-inner_join(mapdata,newDataForNonCauminicate, by="region")
newMapdata<-mapdata_join %>% filter(!is.na(mapdata_join$Number))
map1<- ggplot(newMapdata, aes(x = long, y = lat,fill=Number, group=group))+
geom_polygon(color="black")+
theme(panel.background = element_rect(fill = "#101045")) +
coord_equal()
#map1
map2<-map1 + scale_fill_gradient(name="%Number of dies", low = "Yellow", high="red", na.value = "grey50")+
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
rect = element_blank()
)
#map2
ggdraw() +
draw_image("C:/Users/user/Documents/Data science/R/DataVisualization/bill.jpg", x = 0.35, y = 0.3, scale = .2) +
draw_plot(map2)
#Life experience in 2000 comparing the rest of the word in 2000(Map) with noncommunication desases
Population
## # A tibble: 18,138 × 4
## region Code Year estimates_age
## <chr> <chr> <dbl> <dbl>
## 1 Afghanistan AFG 1956 78.9
## 2 Afghanistan AFG 1957 79.2
## 3 Afghanistan AFG 1958 79.6
## 4 Afghanistan AFG 1959 79.8
## 5 Afghanistan AFG 1960 80.0
## 6 Afghanistan AFG 1961 80.2
## 7 Afghanistan AFG 1962 80.4
## 8 Afghanistan AFG 1963 80.7
## 9 Afghanistan AFG 1964 81.2
## 10 Afghanistan AFG 1965 82
## # … with 18,128 more rows
#study cas of the mortality by noncommunicate deseas(Regression linear)
ggplot(data = Mortality |> filter(Indicator_Name =="Noncommunicable diseases")) +
geom_point(aes(y =Number , x = Age_Group, fill=Sex)) #+
#geom_bar()
#facet_wrap(~Sex)
#Prediction for 2023